In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scripts import import_group_epa, unit_conversion
from joblib import Parallel, delayed
In [6]:
if __name__ == '__main__':
base_path = os.path.join('Clean data', 'EPA emissions')
paths = [os.path.join(base_path, 'EPA emissions ' + str(year) + '.csv') for year in range(2001, 2017)]
df_list = Parallel(n_jobs=-1)(delayed(import_group_epa)(path) for path in paths)
In [7]:
df = pd.concat(df_list)
In [8]:
df.head()
Out[8]:
In [9]:
df.tail()
Out[9]:
In [10]:
path = os.path.join('Clean data', 'Monthly EPA emissions.csv')
df.to_csv(path, index=False)